# -------------------------------------------------------------------
# Create datasets for BART analysis from stata.dta file

# Using R 2.13.0
rm(list = ls(all = TRUE))
library(foreign)

# set up the working directory in your computer
setwd("")

data4 <- read.dta("daughter_analytic_bart.dta")
  
# =================================================================
# sample selection
data4 <- subset(data4, out_sample=="Analytic Sample")
# pre-treatment covariates selection
sel <- c("c_female","c_age","x_oldage","c_born","c_educ")

# variable specification for BART models
data4$fx_daughter <- as.factor(data4$fx_daughter)

data4$period[data4$cntry!="US"&data4$period==1] <- 2002
data4$period[data4$cntry!="US"&data4$period==2] <- 2004
data4$period[data4$cntry!="US"&data4$period==3] <- 2006
data4$period[data4$cntry!="US"&data4$period==4] <- 2008
data4$period[data4$cntry!="US"&data4$period==5] <- 2010
data4$period[data4$cntry!="US"&data4$period==6] <- 2012

# years of education : cap on 20 years in ESS data
data4$c_educ[data4$c_educ>20] <- 20
data4$x_oldage[data4$x_oldage==-1] <- NA # one obs : errors?

data4$cntry<-as.factor(data4$cntry)

# dataset for party ID
data1 <- subset(data4, cntry=="GB"|cntry=="US", select=c("d_repscale","fx_daughter",sel,"period","cntry")) # party ID sample
for (i in 1:length(names(data1))){
  data1 <- data1[!is.na(data1[,i]),]
}

# dataset for political ideology
data2 <- subset(data4, select=c("d_conscale100","fx_daughter",sel,"period","cntry"))
for (i in 1:length(names(data2))){
  data2 <- data2[!is.na(data2[,i]),]
}
rm(i, sel,data4)

setwd("e:/Copy/data/daughter/bart/")
save.image("daughter_bart.RData")

# =================================================================
# for party ID Sample : US and GB
load("daughter_bart.RData")
rm(data2)

cntry.list <- c("US","GB")
plotvars <- c("c_female","c_born","c_age","x_oldage","c_educ")
names(data1)[1]<-c("Y")
names(data1)[2]<-c("Tr")

for (cc in 1:length(cntry.list)) {
  
  data <- subset(data1,cntry==cntry.list[cc])
  data$period <- as.factor(data$period)

for (u in 1:length(plotvars))    {
  
  
  covar <- plotvars[u]
  
  y <- data$Y
  Tr <- data$Tr
  X <- subset(data,select=c("Tr",plotvars,"period"))

  S0 <- NULL
  S1 <- NULL
  if(!is.factor(X[,covar]))   { values <- sort(unique(X[,covar])) } else
  { values <- levels(X[,covar]) }
  if(covar == "c_age") values <- seq(from = min(values), to = max(values), by = 10)
  if(covar == "x_oldage") values <- seq(from = min(values), to = max(values), by = 5)
  if(covar == "c_educ") values <- seq(from = min(values), to = max(values), by = 5)
  
  for(i in 1:length(values))  {
    
    temp <- X
    temp$Tr <- 0
    temp[,covar] <- values[i]
    S0 <- rbind(S0, temp)
    
    temp <- X
    temp$Tr <- 1
    temp[,covar] <- values[i]
    S1 <- rbind(S1, temp)
    
                              }
  
  if(is.factor(X[,covar]))    { S0[,covar] <- factor(S0[,covar], levels = levels(X[,covar]));
                                S1[,covar] <- factor(S1[,covar], levels = levels(X[,covar])) }
  
  S0$Tr <- factor(S0$Tr, levels = levels(X$Tr))
  S1$Tr <- factor(S1$Tr, levels = levels(X$Tr))
  
  
  rm(i,temp)
  save.image(file = paste("id1",cntry.list[cc],covar,".RData", sep = ""))
} # loop for each covar
} # loop for each country


# =================================================================
# for political ideology Sample : GSS and ESS

load("daughter_bart.RData")
rm(data1)
cntry.list <- c(as.character(unique(data2$cntry)), "ESS")
plotvars <- c("c_female","c_born","c_age","x_oldage","c_educ")
names(data2)[1]<-c("Y")
names(data2)[2]<-c("Tr")

for (cc in 1:length(cntry.list)) {  
  if (cntry.list[cc]=="ESS") {data <- subset(data2,cntry!="US")}
  else {data <- subset(data2, cntry==cntry.list[cc])}
  
  data$period <- as.factor(data$period)  
  for (u in 1:length(plotvars))    {
    
    covar <- plotvars[u]
    
    y <- data$Y
    Tr <- data$Tr
	if (cntry.list[cc]=="ESS") {X <- subset(data,select=c("Tr",plotvars,"period","cntry"))}
	else {    X <- subset(data,select=c("Tr",plotvars,"period"))}
    
    S0 <- NULL
    S1 <- NULL
    if(!is.factor(X[,covar]))   { values <- sort(unique(X[,covar])) } else
    { values <- levels(X[,covar]) }
	if(covar == "c_age") values <- seq(from = min(values), to = max(values), by = 10)
	if(covar == "x_oldage") values <- seq(from = min(values), to = max(values), by = 5)
	if(covar == "c_educ") values <- seq(from = min(values), to = max(values), by = 5)
	
    for(i in 1:length(values))  {
      
      temp <- X
      temp$Tr <- 0
      temp[,covar] <- values[i]
      S0 <- rbind(S0, temp)
      
      temp <- X
      temp$Tr <- 1
      temp[,covar] <- values[i]
      S1 <- rbind(S1, temp)
      
    }
    
    if(is.factor(X[,covar]))    { S0[,covar] <- factor(S0[,covar], levels = levels(X[,covar]));
                                  S1[,covar] <- factor(S1[,covar], levels = levels(X[,covar])) }
    
    S0$Tr <- factor(S0$Tr, levels = levels(X$Tr))
    S1$Tr <- factor(S1$Tr, levels = levels(X$Tr))
    
    rm(i,temp)
    save.image(file = paste("id2",cntry.list[cc],covar,".RData", sep = ""))
  } # loop for each covar
} # loop for each country


